library(tidyverse)
library(magrittr)
library(parallel)
library(pander)
library(here)
library(scales)
library(ggpubr)
library(kableExtra)
library(edgeR)
library(DT)
library(ggrepel)
library(pheatmap)
library(ggdendro)
if (interactive()) setwd(here::here())
theme_set(theme_bw())
cores <- detectCores() - 2
load(
here::here("1_Psen2S4Ter/R/output/1_1_DE.RData")
)
jellyfish v2.3.0 was used to count kmers of trimmed
fastq files that had been filtered for rRNA sequences. This
was performed for 5 values: \(k = 5, 6, 7, 8,
9, 10\). Lower values of \(k\)
lose specificity in comparison to higher values, however as \(k\) increases, the exponential increase of
possible kmers causes limitations due to computational processing
time.
k5files <- list.files(
"/mnt/hpcfs/a1647910/20200310_rRNADepletion/1_Psen2S4Ter/analysis-kmer/results/jellyfish/k5",
pattern = ".dumps", full.names = TRUE
)
k5counts <- lapply(k5files, function(x){
read_delim(x, col_names = c("mer", basename(x)), delim = " ") %>%
set_colnames(str_remove_all(colnames(.), "_6month_.+_F3|[0-9]*_Ps2Ex3M1_|\\.dumps"))
}) %>%
purrr::reduce(full_join) %>%
dplyr::select(mer, contains(c("WT", "Heter")))
k5dge <- k5counts %>%
as.data.frame() %>%
column_to_rownames("mer") %>%
DGEList() %>%
calcNormFactors()
k5dge$samples %<>%
rownames_to_column("rowname") %>%
mutate(sample = rowname) %>%
left_join(addInfo) %>%
column_to_rownames("rowname")
k5dge$samples$group <- colnames(k5dge) %>%
str_extract("(WT|Heter)") %>%
factor(levels = c("WT", "Heter"))
k5dist <- k5dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
pivot_longer(everything(), names_to = "sample", values_to = "count") %>%
ggplot(aes(x=count, colour = sample)) +
geom_density() +
labs(x = "intensity", title = "Distribution of 5-mers")
k5labels <- k5dge$samples %>%
mutate(label = paste0(sample, "\n", percent(rRNA, accuracy = 0.01), " rRNA")) %>%
.$label
k5heat <- k5dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
t() %>%
pheatmap(silent = TRUE, cluster_cols = FALSE,
show_colnames = FALSE, fontsize = 9,
fontsize_row = 10, border_color = NA,
main = "5-mer counts heatmap", labels_row = k5labels)
k5heat$tree_row$labels <- k5labels
k5den <- ggdendrogram(k5heat$tree_row, rotate = TRUE) +
labs(title = "Hierarchical clustering of 5-mer counts") +
theme(plot.title = element_text(size = 12))
# Assess cpm values to make sure PCA results are not heavily skewed by highly expressed genes
k5pca <- k5dge %>%
cpm(log = TRUE) %>%
t() %>%
prcomp()
# Quick inspection to check whether first two PCA components capture most of the variability
summary(k5pca)$importance %>% pander(split.tables = Inf)
k5pcaPlot <- k5pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k5dge$samples) %>%
ggplot(aes(PC1, PC2, colour = group, label = rRNA)) +
geom_point(alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
labs(
x = paste0("PC1 (", percent(summary(k5pca)$importance[2, "PC1"]), ")"),
y = paste0("PC2 (", percent(summary(k5pca)$importance[2, "PC2"]), ")"),
colour = "Genotype",
title = "k = 5"
) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k5pcaRrna <- k5pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k5dge$samples) %>%
ggplot(aes(PC1, rRNA, label = rRNA)) +
geom_point(aes(colour = group), alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
geom_smooth(method = "lm") +
labs(
x = paste0("PC1 (", percent(summary(k5pca)$importance[2, "PC1"]), ")"),
y = "rRNA proportion",
colour = "Genotype",
title = "k = 5"
) +
scale_y_continuous(labels = percent) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k5design <- model.matrix(~rRNA, data = k5dge$samples)
k5voom <- voom(k5dge, design = k5design)
k5fit <- lmFit(k5voom, design = k5design)
k5eBayes <- eBayes(k5fit)
k5topTable <- k5eBayes %>%
topTable(coef = colnames(k5design)[2], sort.by = "p", n = Inf) %>%
set_colnames(str_remove(colnames(.), "ID\\.")) %>%
rownames_to_column("mer") %>%
mutate(BY = p.adjust(P.Value, "BY")) %>%
mutate(DE = BY < 0.05) %>%
dplyr::select(
mer,
AveExpr,
logFC,
P.Value,
FDR = adj.P.Val,
BY,
t,
DE,
everything(),
-B
) %>%
as_tibble()
k6files <- list.files(
"/mnt/hpcfs/a1647910/20200310_rRNADepletion/1_Psen2S4Ter/analysis-kmer/results/jellyfish/k6",
pattern = ".dumps", full.names = TRUE
)
k6counts <- lapply(k6files, function(x){
read_delim(x, col_names = c("mer", basename(x)), delim = " ") %>%
set_colnames(str_remove_all(colnames(.), "_6month_.+_F3|[0-9]*_Ps2Ex3M1_|\\.dumps"))
}) %>%
purrr::reduce(full_join) %>%
dplyr::select(mer, contains(c("WT", "Heter")))
k6dge <- k6counts %>%
as.data.frame() %>%
column_to_rownames("mer") %>%
DGEList() %>%
calcNormFactors()
k6dge$samples %<>%
rownames_to_column("rowname") %>%
mutate(sample = rowname) %>%
left_join(addInfo) %>%
column_to_rownames("rowname")
k6dge$samples$group <- colnames(k6dge) %>%
str_extract("(WT|Heter)") %>%
factor(levels = c("WT", "Heter"))
k6dist <- k6dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
pivot_longer(everything(), names_to = "sample", values_to = "count") %>%
ggplot(aes(x=count, colour = sample)) +
geom_density() +
labs(x = "intensity", title = "Distribution of 6-mers")
k6labels <- k6dge$samples %>%
mutate(label = paste0(sample, "\n", percent(rRNA, accuracy = 0.01), " rRNA")) %>%
.$label
k6heat <- k6dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
t() %>%
pheatmap(silent = TRUE, cluster_cols = FALSE,
show_colnames = FALSE, fontsize = 9,
fontsize_row = 10, border_color = NA,
main = "6-mer counts heatmap", labels_row = k6labels)
k6heat$tree_row$labels <- k6labels
k6den <- ggdendrogram(k6heat$tree_row, rotate = TRUE) +
labs(title = "Hierarchical clustering of 6-mer counts") +
theme(plot.title = element_text(size = 12))
# Assess cpm values to make sure PCA results are not heavily skewed by highly expressed genes
k6pca <- k6dge %>%
cpm(log = TRUE) %>%
t() %>%
prcomp()
# Quick inspection to check whether first two PCA components capture most of the variability
summary(k6pca)$importance %>% pander(split.tables = Inf)
# Plot PCA
k6pcaPlot <- k6pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k6dge$samples) %>%
ggplot(aes(PC1, PC2, colour = group, label = rRNA)) +
geom_point(alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
labs(
x = paste0("PC1 (", percent(summary(k6pca)$importance[2, "PC1"]), ")"),
y = paste0("PC2 (", percent(summary(k6pca)$importance[2, "PC2"]), ")"),
colour = "Genotype",
title = "k = 6"
) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k6pcaRrna <- k6pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k6dge$samples) %>%
ggplot(aes(PC1, rRNA, label = rRNA)) +
geom_point(aes(colour = group), alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
geom_smooth(method = "lm") +
labs(
x = paste0("PC1 (", percent(summary(k6pca)$importance[2, "PC1"]), ")"),
y = "rRNA proportion",
colour = "Genotype",
title = "k = 6"
) +
scale_y_continuous(labels = percent) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k6design <- model.matrix(~rRNA, data = k6dge$samples)
k6voom <- voom(k6dge, design = k6design)
k6fit <- lmFit(k6voom, design = k6design)
k6eBayes <- eBayes(k6fit)
k6topTable <- k6eBayes %>%
topTable(coef = colnames(k6design)[2], sort.by = "p", n = Inf) %>%
set_colnames(str_remove(colnames(.), "ID\\.")) %>%
rownames_to_column("mer") %>%
mutate(BY = p.adjust(P.Value, "BY")) %>%
mutate(DE = BY < 0.05) %>%
dplyr::select(
mer,
AveExpr,
logFC,
P.Value,
FDR = adj.P.Val,
BY,
t,
DE,
everything(),
-B
) %>%
as_tibble()
k7files <- list.files(
"/mnt/hpcfs/a1647910/20200310_rRNADepletion/1_Psen2S4Ter/analysis-kmer/results/jellyfish/k7",
pattern = ".dumps", full.names = TRUE
)
k7counts <- lapply(k7files, function(x){
read_delim(x, col_names = c("mer", basename(x)), delim = " ") %>%
set_colnames(str_remove_all(colnames(.), "_6month_.+_F3|[0-9]*_Ps2Ex3M1_|\\.dumps"))
}) %>%
purrr::reduce(full_join) %>%
dplyr::select(mer, contains(c("WT", "Heter")))
k7dge <- k7counts %>%
as.data.frame() %>%
column_to_rownames("mer") %>%
DGEList() %>%
calcNormFactors()
k7dge$samples %<>%
rownames_to_column("rowname") %>%
mutate(sample = rowname) %>%
left_join(addInfo) %>%
column_to_rownames("rowname")
k7dge$samples$group <- colnames(k7dge) %>%
str_extract("(WT|Heter)") %>%
factor(levels = c("WT", "Heter"))
k7dist <- k7dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
pivot_longer(everything(), names_to = "sample", values_to = "count") %>%
ggplot(aes(x=count, colour = sample)) +
geom_density() +
labs(x = "intensity", title = "Distribution of 7-mers")
k7labels <- k7dge$samples %>%
mutate(label = paste0(sample, "\n", percent(rRNA, accuracy = 0.01), " rRNA")) %>%
.$label
k7heat <- k7dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
t() %>%
pheatmap(silent = TRUE, cluster_cols = FALSE,
show_colnames = FALSE, fontsize = 9,
fontsize_row = 10, border_color = NA,
main = "7-mer counts heatmap", labels_row = k7labels)
k7heat$tree_row$labels <- k7labels
k7den <- ggdendrogram(k7heat$tree_row, rotate = TRUE) +
labs(title = "Hierarchical clustering of 7-mer counts") +
theme(plot.title = element_text(size = 12))
# Assess cpm values to make sure PCA results are not heavily skewed by highly expressed genes
k7pca <- k7dge %>%
cpm(log = TRUE) %>%
t() %>%
prcomp()
# Quick inspection to check whether first two PCA components capture most of the variability
summary(k7pca)$importance %>% pander(split.tables = Inf)
# Plot PCA
k7pcaPlot <- k7pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k7dge$samples) %>%
ggplot(aes(PC1, PC2, colour = group, label = rRNA)) +
geom_point(alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
labs(
x = paste0("PC1 (", percent(summary(k7pca)$importance[2, "PC1"]), ")"),
y = paste0("PC2 (", percent(summary(k7pca)$importance[2, "PC2"]), ")"),
colour = "Genotype",
title = "k = 7"
) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k7pcaRrna <- k7pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k7dge$samples) %>%
ggplot(aes(PC1, rRNA, label = rRNA)) +
geom_point(aes(colour = group), alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
geom_smooth(method = "lm") +
labs(
x = paste0("PC1 (", percent(summary(k7pca)$importance[2, "PC1"]), ")"),
y = "rRNA proportion",
colour = "Genotype",
title = "k = 7"
) +
scale_y_continuous(labels = percent) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k7design <- model.matrix(~rRNA, data = k7dge$samples)
k7voom <- voom(k7dge, design = k7design)
k7fit <- lmFit(k7voom, design = k7design)
k7eBayes <- eBayes(k7fit)
k7topTable <- k7eBayes %>%
topTable(coef = colnames(k7design)[2], sort.by = "p", n = Inf) %>%
set_colnames(str_remove(colnames(.), "ID\\.")) %>%
rownames_to_column("mer") %>%
mutate(BY = p.adjust(P.Value, "BY")) %>%
mutate(DE = BY < 0.05) %>%
dplyr::select(
mer,
AveExpr,
logFC,
P.Value,
FDR = adj.P.Val,
BY,
t,
DE,
everything(),
-B
) %>%
as_tibble()
k8files <- list.files(
"/mnt/hpcfs/a1647910/20200310_rRNADepletion/1_Psen2S4Ter/analysis-kmer/results/jellyfish/k8",
pattern = ".dumps", full.names = TRUE
)
k8counts <- lapply(k8files, function(x){
read_delim(x, col_names = c("mer", basename(x)), delim = " ") %>%
set_colnames(str_remove_all(colnames(.), "_6month_.+_F3|[0-9]*_Ps2Ex3M1_|\\.dumps"))
}) %>%
purrr::reduce(full_join) %>%
dplyr::select(mer, contains(c("WT", "Heter")))
k8dge <- k8counts %>%
as.data.frame() %>%
column_to_rownames("mer") %>%
DGEList() %>%
calcNormFactors()
k8dge$samples %<>%
rownames_to_column("rowname") %>%
mutate(sample = rowname) %>%
left_join(addInfo) %>%
column_to_rownames("rowname")
k8dge$samples$group <- colnames(k8dge) %>%
str_extract("(WT|Heter)") %>%
factor(levels = c("WT", "Heter"))
k8dist <- k8dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
pivot_longer(everything(), names_to = "sample", values_to = "count") %>%
ggplot(aes(x=count, colour = sample)) +
geom_density() +
labs(x = "intensity", title = "Distribution of 8-mers")
k8labels <- k8dge$samples %>%
mutate(label = paste0(sample, "\n", percent(rRNA, accuracy = 0.01), " rRNA")) %>%
.$label
k8heat <- k8dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
t() %>%
pheatmap(silent = TRUE, cluster_cols = FALSE,
show_colnames = FALSE, fontsize = 9,
fontsize_row = 10, border_color = NA,
main = "8-mer counts heatmap", labels_row = k8labels)
k8heat$tree_row$labels <- k8labels
k8den <- ggdendrogram(k8heat$tree_row, rotate = TRUE) +
labs(title = "Hierarchical clustering of 8-mer counts") +
theme(plot.title = element_text(size = 12))
# Assess cpm values to make sure PCA results are not heavily skewed by highly expressed genes
k8pca <- k8dge %>%
cpm(log = TRUE) %>%
t() %>%
prcomp()
# Quick inspection to check whether first two PCA components capture most of the variability
summary(k8pca)$importance %>% pander(split.tables = Inf)
# Plot PCA
k8pcaPlot <- k8pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k8dge$samples) %>%
ggplot(aes(PC1, PC2, colour = group, label = rRNA)) +
geom_point(alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
labs(
x = paste0("PC1 (", percent(summary(k8pca)$importance[2, "PC1"]), ")"),
y = paste0("PC2 (", percent(summary(k8pca)$importance[2, "PC2"]), ")"),
colour = "Genotype",
title = "k = 8"
) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k8pcaRrna <- k8pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k8dge$samples) %>%
ggplot(aes(PC1, rRNA, label = rRNA)) +
geom_point(aes(colour = group), alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
geom_smooth(method = "lm") +
labs(
x = paste0("PC1 (", percent(summary(k8pca)$importance[2, "PC1"]), ")"),
y = "rRNA proportion",
colour = "Genotype",
title = "k = 8"
) +
scale_y_continuous(labels = percent) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k8design <- model.matrix(~rRNA, data = k8dge$samples)
k8voom <- voom(k8dge, design = k8design)
k8fit <- lmFit(k8voom, design = k8design)
k8eBayes <- eBayes(k8fit)
k8topTable <- k8eBayes %>%
topTable(coef = colnames(k8design)[2], sort.by = "p", n = Inf) %>%
set_colnames(str_remove(colnames(.), "ID\\.")) %>%
rownames_to_column("mer") %>%
mutate(BY = p.adjust(P.Value, "BY")) %>%
mutate(DE = BY < 0.05) %>%
dplyr::select(
mer,
AveExpr,
logFC,
P.Value,
FDR = adj.P.Val,
BY,
t,
DE,
everything(),
-B
) %>%
as_tibble()
k9files <- list.files(
"/mnt/hpcfs/a1647910/20200310_rRNADepletion/1_Psen2S4Ter/analysis-kmer/results/jellyfish/k9",
pattern = ".dumps", full.names = TRUE
)
k9counts <- lapply(k9files, function(x){
read_delim(x, col_names = c("mer", basename(x)), delim = " ") %>%
set_colnames(str_remove_all(colnames(.), "_6month_.+_F3|[0-9]*_Ps2Ex3M1_|\\.dumps"))
}) %>%
purrr::reduce(full_join) %>%
dplyr::select(mer, contains(c("WT", "Heter")))
k9dge <- k9counts %>%
as.data.frame() %>%
column_to_rownames("mer") %>%
DGEList() %>%
calcNormFactors()
k9dge$samples %<>%
rownames_to_column("rowname") %>%
mutate(sample = rowname) %>%
left_join(addInfo) %>%
column_to_rownames("rowname")
k9dge$samples$group <- colnames(k9dge) %>%
str_extract("(WT|Heter)") %>%
factor(levels = c("WT", "Heter"))
k9dist <- k9dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
pivot_longer(everything(), names_to = "sample", values_to = "count") %>%
ggplot(aes(x=count, colour = sample)) +
geom_density() +
labs(x = "intensity", title = "Distribution of 9-mers")
k9labels <- k9dge$samples %>%
mutate(label = paste0(sample, "\n", percent(rRNA, accuracy = 0.01), " rRNA")) %>%
.$label
k9heat <- k9dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
t() %>%
pheatmap(silent = TRUE, cluster_cols = FALSE,
show_colnames = FALSE, fontsize = 9,
fontsize_row = 10, border_color = NA,
main = "9-mer counts heatmap", labels_row = k9labels)
k9heat$tree_row$labels <- k9labels
k9den <- ggdendrogram(k9heat$tree_row, rotate = TRUE) +
labs(title = "Hierarchical clustering of 9-mer counts") +
theme(plot.title = element_text(size = 12))
# Assess cpm values to make sure PCA results are not heavily skewed by highly expressed genes
k9pca <- k9dge %>%
cpm(log = TRUE) %>%
t() %>%
prcomp()
# Quick inspection to check whether first two PCA components capture most of the variability
summary(k9pca)$importance %>% pander(split.tables = Inf)
# Plot PCA
k9pcaPlot <- k9pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k9dge$samples) %>%
ggplot(aes(PC1, PC2, colour = group, label = rRNA)) +
geom_point(alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
labs(
x = paste0("PC1 (", percent(summary(k9pca)$importance[2, "PC1"]), ")"),
y = paste0("PC2 (", percent(summary(k9pca)$importance[2, "PC2"]), ")"),
colour = "Genotype",
title = "k = 9"
) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k9pcaRrna <- k9pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k9dge$samples) %>%
ggplot(aes(PC1, rRNA, label = rRNA)) +
geom_point(aes(colour = group), alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
geom_smooth(method = "lm") +
labs(
x = paste0("PC1 (", percent(summary(k9pca)$importance[2, "PC1"]), ")"),
y = "rRNA proportion",
colour = "Genotype",
title = "k = 9"
) +
scale_y_continuous(labels = percent) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k9design <- model.matrix(~rRNA, data = k9dge$samples)
k9voom <- voom(k9dge, design = k9design)
k9fit <- lmFit(k9voom, design = k9design)
k9eBayes <- eBayes(k9fit)
k9topTable <- k9eBayes %>%
topTable(coef = colnames(k9design)[2], sort.by = "p", n = Inf) %>%
set_colnames(str_remove(colnames(.), "ID\\.")) %>%
rownames_to_column("mer") %>%
mutate(BY = p.adjust(P.Value, "BY")) %>%
mutate(DE = BY < 0.05) %>%
dplyr::select(
mer,
AveExpr,
logFC,
P.Value,
FDR = adj.P.Val,
BY,
t,
DE,
everything(),
-B
) %>%
as_tibble()
k10files <- list.files(
"/mnt/hpcfs/a1647910/20200310_rRNADepletion/1_Psen2S4Ter/analysis-kmer/results/jellyfish/k10",
pattern = ".dumps", full.names = TRUE
)
k10counts <- lapply(k10files, function(x){
read_delim(x, col_names = c("mer", basename(x)), delim = " ") %>%
set_colnames(str_remove_all(colnames(.), "_6month_.+_F3|[0-9]*_Ps2Ex3M1_|\\.dumps"))
}) %>%
purrr::reduce(full_join) %>%
dplyr::select(mer, contains(c("WT", "Heter")))
k10dge <- k10counts %>%
as.data.frame() %>%
column_to_rownames("mer") %>%
DGEList() %>%
calcNormFactors()
k10dge$samples %<>%
rownames_to_column("rowname") %>%
mutate(sample = rowname) %>%
left_join(addInfo) %>%
column_to_rownames("rowname")
k10dge$samples$group <- colnames(k10dge) %>%
str_extract("(WT|Heter)") %>%
factor(levels = c("WT", "Heter"))
k10dist <- k10dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
pivot_longer(everything(), names_to = "sample", values_to = "count") %>%
ggplot(aes(x=count, colour = sample)) +
geom_density() +
labs(x = "intensity", title = "Distribution of 10-mers")
k10labels <- k10dge$samples %>%
mutate(label = paste0(sample, "\n", percent(rRNA, accuracy = 0.01), " rRNA")) %>%
.$label
k10heat <- k10dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
t() %>%
pheatmap(silent = TRUE, cluster_cols = FALSE,
show_colnames = FALSE, fontsize = 9,
fontsize_row = 10, border_color = NA,
main = "10-mer counts heatmap", labels_row = k10labels)
k10heat$tree_row$labels <- k10labels
k10den <- ggdendrogram(k10heat$tree_row, rotate = TRUE) +
labs(title = "Hierarchical clustering of 10-mer counts") +
theme(plot.title = element_text(size = 12))
# Assess cpm values to make sure PCA results are not heavily skewed by highly expressed genes
k10pca <- k10dge %>%
cpm(log = TRUE) %>%
t() %>%
prcomp()
# Quick inspection to check whether first two PCA components capture most of the variability
summary(k10pca)$importance %>% pander(split.tables = Inf)
# Plot PCA
k10pcaPlot <- k10pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k10dge$samples) %>%
ggplot(aes(PC1, PC2, colour = group, label = rRNA)) +
geom_point(alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
labs(
x = paste0("PC1 (", percent(summary(k10pca)$importance[2, "PC1"]), ")"),
y = paste0("PC2 (", percent(summary(k10pca)$importance[2, "PC2"]), ")"),
colour = "Genotype",
title = "k = 10"
) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k10pcaRrna <- k10pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k10dge$samples) %>%
ggplot(aes(PC1, rRNA, label = rRNA)) +
geom_point(aes(colour = group), alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
geom_smooth(method = "lm") +
labs(
x = paste0("PC1 (", percent(summary(k10pca)$importance[2, "PC1"]), ")"),
y = "rRNA proportion",
colour = "Genotype",
title = "k = 10"
) +
scale_y_continuous(labels = percent) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k10design <- model.matrix(~rRNA, data = k10dge$samples)
k10voom <- voom(k10dge, design = k10design)
k10fit <- lmFit(k10voom, design = k10design)
k10eBayes <- eBayes(k10fit)
k10topTable <- k10eBayes %>%
topTable(coef = colnames(k10design)[2], sort.by = "p", n = Inf) %>%
set_colnames(str_remove(colnames(.), "ID\\.")) %>%
rownames_to_column("mer") %>%
mutate(BY = p.adjust(P.Value, "BY")) %>%
mutate(DE = BY < 0.05) %>%
dplyr::select(
mer,
AveExpr,
logFC,
P.Value,
FDR = adj.P.Val,
BY,
t,
DE,
everything(),
-B
) %>%
as_tibble()
k11files <- list.files(
"/mnt/hpcfs/a1647910/20200310_rRNADepletion/1_Psen2S4Ter/analysis-kmer/results/jellyfish/k11",
pattern = ".dumps", full.names = TRUE
)
k11counts <- lapply(k11files, function(x){
read_delim(x, col_names = c("mer", basename(x)), delim = " ") %>%
set_colnames(str_remove_all(colnames(.), "_6month_.+_F3|[0-9]*_Ps2Ex3M1_|\\.dumps"))
}) %>%
purrr::reduce(full_join) %>%
dplyr::select(mer, contains(c("WT", "Heter")))
k11dge <- k11counts %>%
as.data.frame() %>%
column_to_rownames("mer") %>%
replace(is.na(.), 0) %>%
DGEList() %>%
calcNormFactors()
k11dge$samples %<>%
rownames_to_column("rowname") %>%
mutate(sample = rowname) %>%
left_join(addInfo) %>%
column_to_rownames("rowname")
k11dge$samples$group <- colnames(k11dge) %>%
str_extract("(WT|Heter)") %>%
factor(levels = c("WT", "Heter"))
k11dist <- k11dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
pivot_longer(everything(), names_to = "sample", values_to = "count") %>%
ggplot(aes(x=count, colour = sample)) +
geom_density() +
labs(x = "intensity", title = "Distribution of 11-mers")
k11labels <- k11dge$samples %>%
mutate(label = paste0(sample, "\n", percent(rRNA, accuracy = 0.01), " rRNA")) %>%
.$label
k11heat <- k11dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
t() %>%
pheatmap(silent = TRUE, cluster_cols = FALSE,
show_colnames = FALSE, fontsize = 9,
fontsize_row = 10, border_color = NA,
main = "11-mer counts heatmap", labels_row = k11labels)
k11heat$tree_row$labels <- k11labels
k11den <- ggdendrogram(k11heat$tree_row, rotate = TRUE) +
labs(title = "Hierarchical clustering of 11-mer counts") +
theme(plot.title = element_text(size = 12))
# Assess cpm values to make sure PCA results are not heavily skewed by highly expressed genes
k11pca <- k11dge %>%
cpm(log = TRUE) %>%
t() %>%
prcomp()
# Quick inspection to check whether first two PCA components capture most of the variability
summary(k11pca)$importance %>% pander(split.tables = Inf)
# Plot PCA
k11pcaPlot <- k11pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k11dge$samples) %>%
ggplot(aes(PC1, PC2, colour = group, label = rRNA)) +
geom_point(alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
labs(
x = paste0("PC1 (", percent(summary(k11pca)$importance[2, "PC1"]), ")"),
y = paste0("PC2 (", percent(summary(k11pca)$importance[2, "PC2"]), ")"),
colour = "Genotype",
title = "k = 11"
) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k11pcaRrna <- k11pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k11dge$samples) %>%
ggplot(aes(PC1, rRNA, label = rRNA)) +
geom_point(aes(colour = group), alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
geom_smooth(method = "lm") +
labs(
x = paste0("PC1 (", percent(summary(k11pca)$importance[2, "PC1"]), ")"),
y = "rRNA proportion",
colour = "Genotype",
title = "k = 11"
) +
scale_y_continuous(labels = percent) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k11design <- model.matrix(~rRNA, data = k11dge$samples)
k11voom <- voom(k11dge, design = k11design)
k11fit <- lmFit(k11voom, design = k11design)
k11eBayes <- eBayes(k11fit)
k11topTable <- k11eBayes %>%
topTable(coef = colnames(k11design)[2], sort.by = "p", n = Inf) %>%
set_colnames(str_remove(colnames(.), "ID\\.")) %>%
rownames_to_column("mer") %>%
mutate(BY = p.adjust(P.Value, "BY")) %>%
mutate(DE = BY < 0.05) %>%
dplyr::select(
mer,
AveExpr,
logFC,
P.Value,
FDR = adj.P.Val,
BY,
t,
DE,
everything(),
-B
) %>%
as_tibble()
k12files <- list.files(
"/mnt/hpcfs/a1647910/20200310_rRNADepletion/1_Psen2S4Ter/analysis-kmer/results/jellyfish/k12",
pattern = ".dumps", full.names = TRUE
)
k12counts <- lapply(k12files, function(x){
read_delim(x, col_names = c("mer", basename(x)), delim = " ") %>%
set_colnames(str_remove_all(colnames(.), "_6month_.+_F3|[0-9]*_Ps2Ex3M1_|\\.dumps"))
}) %>%
purrr::reduce(full_join) %>%
dplyr::select(mer, contains(c("WT", "Heter")))
k12dge <- k12counts %>%
as.data.frame() %>%
column_to_rownames("mer") %>%
replace(is.na(.), 0) %>%
DGEList() %>%
calcNormFactors()
k12dge$samples %<>%
rownames_to_column("rowname") %>%
mutate(sample = rowname) %>%
left_join(addInfo) %>%
column_to_rownames("rowname")
k12dge$samples$group <- colnames(k12dge) %>%
str_extract("(WT|Heter)") %>%
factor(levels = c("WT", "Heter"))
k12dist <- k12dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
pivot_longer(everything(), names_to = "sample", values_to = "count") %>%
ggplot(aes(x=count, colour = sample)) +
geom_density() +
labs(x = "intensity", title = "Distribution of 12-mers")
k12labels <- k12dge$samples %>%
mutate(label = paste0(sample, "\n", percent(rRNA, accuracy = 0.01), " rRNA")) %>%
.$label
k12heat <- k12dge %>%
cpm(log = TRUE) %>%
as.data.frame() %>%
t() %>%
pheatmap(silent = TRUE, cluster_cols = FALSE,
show_colnames = FALSE, fontsize = 9,
fontsize_row = 10, border_color = NA,
main = "12-mer counts heatmap", labels_row = k12labels)
k12heat$tree_row$labels <- k12labels
k12den <- ggdendrogram(k12heat$tree_row, rotate = TRUE) +
labs(title = "Hierarchical clustering of 12-mer counts") +
theme(plot.title = element_text(size = 12))
# Assess cpm values to make sure PCA results are not heavily skewed by highly expressed genes
k12pca <- k12dge %>%
cpm(log = TRUE) %>%
t() %>%
prcomp()
# Quick inspection to check whether first two PCA components capture most of the variability
summary(k12pca)$importance %>% pander(split.tables = Inf)
# Plot PCA
k12pcaPlot <- k12pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k12dge$samples) %>%
ggplot(aes(PC1, PC2, colour = group, label = rRNA)) +
geom_point(alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
labs(
x = paste0("PC1 (", percent(summary(k12pca)$importance[2, "PC1"]), ")"),
y = paste0("PC2 (", percent(summary(k12pca)$importance[2, "PC2"]), ")"),
colour = "Genotype",
title = "k = 12"
) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k12pcaRrna <- k12pca$x %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
as_tibble() %>%
dplyr::select(sample, PC1, PC2) %>%
left_join(k12dge$samples) %>%
ggplot(aes(PC1, rRNA, label = rRNA)) +
geom_point(aes(colour = group), alpha = 0.8, size = 3) +
geom_text_repel(show.legend = FALSE) +
geom_smooth(method = "lm") +
labs(
x = paste0("PC1 (", percent(summary(k12pca)$importance[2, "PC1"]), ")"),
y = "rRNA proportion",
colour = "Genotype",
title = "k = 12"
) +
scale_y_continuous(labels = percent) +
scale_colour_discrete(labels = c("Wildtype", "Mutant"))
k12design <- model.matrix(~rRNA, data = k12dge$samples)
k12voom <- voom(k12dge, design = k12design)
k12fit <- lmFit(k12voom, design = k12design)
k12eBayes <- eBayes(k12fit)
k12topTable <- k12eBayes %>%
topTable(coef = colnames(k12design)[2], sort.by = "p", n = Inf) %>%
set_colnames(str_remove(colnames(.), "ID\\.")) %>%
rownames_to_column("mer") %>%
mutate(BY = p.adjust(P.Value, "BY")) %>%
mutate(DE = BY < 0.05) %>%
dplyr::select(
mer,
AveExpr,
logFC,
P.Value,
FDR = adj.P.Val,
BY,
t,
DE,
everything(),
-B
) %>%
as_tibble()
ggarrange(
k5dist, k6dist, k7dist, k8dist, k9dist, k10dist, k11dist, k12dist,
ncol = 2, nrow = 4, common.legend = TRUE, legend = "bottom"
)
Distributions of kmer counts for each value of \(k\)
ggarrange(
k5pcaPlot, k6pcaPlot, k7pcaPlot, k8pcaPlot, k9pcaPlot, k10pcaPlot, k11pcaPlot, k12pcaPlot,
ncol = 2, nrow = 4, common.legend = TRUE, legend = "bottom"
)
Principal component analysis for all values of \(k\). As \(k\) increases, WT and mutant groups start to separate.
ggarrange(
k5pcaRrna, k6pcaRrna, k7pcaRrna, k8pcaRrna, k9pcaRrna, k10pcaRrna, k11pcaRrna, k12pcaRrna,
ncol = 2, nrow = 4, common.legend = TRUE, legend = "bottom"
)
Principal component analysis for all values of \(k\). As \(k\) increases, WT and mutant groups start to separate.
ggarrange(
k5den, k6den, k7den, k8den, k9den, k10den, k11den, k12den,
ncol = 2, nrow = 4, common.legend = TRUE, legend = "bottom"
)
Hierarchical clustering of samples based on kmer counts. As \(k\) increases, WT and mutant groups start to cluster.
topRes <- function(x, cap){
x %>%
dplyr::select(mer, AveExpr, logFC, P.Value, FDR, BY, DE) %>%
mutate(
AveExpr = format(round(AveExpr, 2), nsmall = 2),
logFC = format(round(logFC, 2), nsmall = 2),
P.Value = sprintf("%.2e", P.Value),
FDR = sprintf("%.2e", FDR),
BY = sprintf("%.2e", BY)
) %>%
dplyr::slice(1:100) %>%
datatable(
options = list(pageLength = 10),
class = "striped hover condensed responsive",
filter = "top",
caption = cap
)
}
topRes(k5topTable,
cap = paste(
"The top 100 differentially expressed 5-mers.",
nrow(dplyr::filter(k5topTable, DE)),
"of",
nrow(k5topTable),
"detected sequences were classified as DE with BY p-value < 0.05."
)
)
k5topTable %>%
ggplot(aes(P.Value)) +
geom_histogram(
binwidth = 0.02,
colour = "black", fill = "grey90"
) +
labs(title = "k = 5")
Histogram of p-values. Values follow the expected distribution when there are many differences.
k5topTable %>%
ggplot(aes(logFC, -log10(P.Value), colour = DE)) +
geom_point(alpha = 0.5) +
scale_colour_manual(values = c("black", "grey50", "red")) +
geom_text_repel(
data = . %>%
# dplyr::filter(-log10(P.Value) > 4 | logFC > 4 | logFC < -2),
dplyr::filter(-log10(P.Value) > 4 | logFC > 3 | logFC < -2.5),
aes(label = mer, color = "black")
) +
labs(x = "logFC", y = expression(paste(-log[10], "(p)")), title = "k = 5") +
theme_bw() +
theme(legend.position = "none")
Volcano plot showing -log10(p-value) against logFC. Kmers classified as DE with BY p-value < 0.05 are highlighted in red.
topRes(k6topTable,
cap = paste(
"The top 100 differentially expressed 6-mers.",
nrow(dplyr::filter(k6topTable, DE)),
"of",
nrow(k6topTable),
"detected sequences were classified as DE with a BY p-value < 0.05."
)
)
k6topTable %>%
ggplot(aes(P.Value)) +
geom_histogram(
binwidth = 0.02,
colour = "black", fill = "grey90"
) +
labs(title = "k = 6")
Histogram of p-values. Values follow the expected distribution when there are some differences.
k6topTable %>%
ggplot(aes(logFC, -log10(P.Value), colour = DE)) +
geom_point(alpha = 0.5) +
scale_colour_manual(values = c("black", "grey50", "red")) +
geom_text_repel(
data = . %>%
dplyr::filter(-log10(P.Value) > 6 | logFC > 6 | logFC < -2.3),
aes(label = mer, color = "black")
) +
labs(x = "logFC", y = expression(paste(-log[10], "(p)")), title = "k = 6") +
theme_bw() +
theme(legend.position = "none")
Volcano plot showing -log10(p-value) against logFC. Kmers classified as DE with BY p-value < 0.05 are highlighted in red.
topRes(k7topTable,
cap = paste(
"The top 100 differentially expressed 7-mers.",
nrow(dplyr::filter(k7topTable, DE)),
"of",
nrow(k7topTable),
"detected sequences were classified as DE with a BY p-value < 0.05."
)
)
k7topTable %>%
ggplot(aes(P.Value)) +
geom_histogram(
binwidth = 0.02,
colour = "black", fill = "grey90"
) +
labs(title = "k = 7")
Histogram of p-values. Values follow the expected distribution when there are many differences.
k7topTable %>%
ggplot(aes(logFC, -log10(P.Value), colour = DE)) +
geom_point(alpha = 0.5) +
scale_colour_manual(values = c("black", "grey50", "red")) +
geom_text_repel(
data = . %>%
dplyr::filter(-log10(P.Value) > 6.4 | logFC > 10 | logFC < -5),
aes(label = mer, color = "black")
) +
labs(x = "logFC", y = expression(paste(-log[10], "(p)")), title = "k = 7") +
theme_bw() +
theme(legend.position = "none")
Volcano plot showing -log10(p-value) against logFC. Kmers classified as DE with BY p-value < 0.05 are highlighted in red.
topRes(k8topTable,
cap = paste(
"The top 100 differentially expressed 8-mers.",
nrow(dplyr::filter(k8topTable, DE)),
"of",
nrow(k8topTable),
"detected sequences were classified as DE with a BY p-value < 0.05."
)
)
k8topTable %>%
ggplot(aes(P.Value)) +
geom_histogram(
binwidth = 0.02,
colour = "black", fill = "grey90"
) +
labs(title = "k = 8")
Histogram of p-values. Values follow the expected distribution when there are many differences.
k8topTable %>%
ggplot(aes(logFC, -log10(P.Value), colour = DE)) +
geom_point(alpha = 0.5) +
scale_colour_manual(values = c("black", "grey50", "red")) +
geom_text_repel(
data = . %>%
dplyr::filter(-log10(P.Value) > 7.2 | logFC > 12.5 | logFC < -8),
aes(label = mer, color = "black")
) +
labs(x = "logFC", y = expression(paste(-log[10], "(p)")), title = "k = 8") +
theme_bw() +
theme(legend.position = "none")
Volcano plot showing -log10(p-value) against logFC. Kmers classified as DE with BY p-value < 0.05 are highlighted in red.
topRes(k9topTable,
cap = paste(
"The top 100 differentially expressed 9-mers.",
nrow(dplyr::filter(k9topTable, DE)),
"of",
nrow(k9topTable),
"detected sequences were classified as DE with a BY p-value < 0.05."
)
)
k9topTable %>%
ggplot(aes(P.Value)) +
geom_histogram(
binwidth = 0.02,
colour = "black", fill = "grey90"
) +
labs(title = "k = 9")
Histogram of p-values. Values follow the expected distribution when there are many differences.
k9topTable %>%
ggplot(aes(logFC, -log10(P.Value), colour = DE)) +
geom_point(alpha = 0.5) +
scale_colour_manual(values = c("black", "grey50", "red")) +
geom_text_repel(
data = . %>%
dplyr::filter(DE & -log10(P.Value) > 7.5 | logFC < -10 | logFC > 15),
aes(label = mer, color = "black")
) +
labs(x = "logFC", y = expression(paste(-log[10], "(p)")), title = "k = 9") +
theme_bw() +
theme(legend.position = "none")
Volcano plot showing -log10(p-value) against logFC. Kmers classified as DE with BY p-value < 0.05 are highlighted in red.
topRes(k10topTable,
cap = paste(
"The top 100 differentially expressed 10-mers.",
nrow(dplyr::filter(k10topTable, DE)),
"of",
nrow(k10topTable),
"detected sequences were classified as DE with a BY p-value < 0.05."
)
)
k10topTable %>%
ggplot(aes(P.Value)) +
geom_histogram(
binwidth = 0.02,
colour = "black", fill = "grey90"
) +
labs(title = "k = 10")
Histogram of p-values. Values follow the expected distribution when there are many differences.
k10topTable %>%
ggplot(aes(logFC, -log10(P.Value), colour = DE)) +
geom_point(alpha = 0.5) +
scale_colour_manual(values = c("black", "grey50", "red")) +
geom_text_repel(
data = . %>%
dplyr::filter(-log10(P.Value) > 8.5 | logFC > 20 | logFC < -13.5),
aes(label = mer, color = "black")
) +
labs(x = "logFC", y = expression(paste(-log[10], "(p)")), title = "k = 10") +
theme_bw() +
theme(legend.position = "none")
Volcano plot showing -log10(p-value) against logFC. Kmers classified as DE with BY p-value < 0.05 are highlighted in red.
topRes(k11topTable,
cap = paste(
"The top 100 differentially expressed 11-mers.",
nrow(dplyr::filter(k11topTable, DE)),
"of",
nrow(k11topTable),
"detected sequences were classified as DE with a BY p-value < 0.05."
)
)
k11topTable %>%
ggplot(aes(P.Value)) +
geom_histogram(
binwidth = 0.02,
colour = "black", fill = "grey90"
) +
labs(title = "k = 11")
Histogram of p-values. Values follow the expected distribution when there are many differences.
k11topTable %>%
ggplot(aes(logFC, -log10(P.Value), colour = DE)) +
geom_point(alpha = 0.5) +
scale_colour_manual(values = c("black", "grey50", "red")) +
geom_text_repel(
data = . %>%
dplyr::filter(-log10(P.Value) > 8.5 | logFC > 20 | logFC < -13.5),
aes(label = mer, color = "black")
) +
labs(x = "logFC", y = expression(paste(-log[10], "(p)")), title = "k = 11") +
theme_bw() +
theme(legend.position = "none")
Volcano plot showing -log10(p-value) against logFC. Kmers classified as DE with BY p-value < 0.05 are highlighted in red.
topRes(k12topTable,
cap = paste(
"The top 100 differentially expressed 12-mers.",
nrow(dplyr::filter(k12topTable, DE)),
"of",
nrow(k12topTable),
"detected sequences were classified as DE with a BY p-value < 0.05."
)
)
k12topTable %>%
ggplot(aes(P.Value)) +
geom_histogram(
binwidth = 0.02,
colour = "black", fill = "grey90"
) +
labs(title = "k = 12")
Histogram of p-values. Values follow the expected distribution when there are many differences.
k12topTable %>%
ggplot(aes(logFC, -log10(P.Value), colour = DE)) +
geom_point(alpha = 0.5) +
scale_colour_manual(values = c("black", "grey50", "red")) +
geom_text_repel(
data = . %>%
dplyr::filter(-log10(P.Value) > 8.5 | logFC > 20 | logFC < -13.5),
aes(label = mer, color = "black")
) +
labs(x = "logFC", y = expression(paste(-log[10], "(p)")), title = "k = 11") +
theme_bw() +
theme(legend.position = "none")
Volcano plot showing -log10(p-value) against logFC. Kmers classified as DE with BY p-value < 0.05 are highlighted in red.
save(
k5counts,
k6counts,
k7counts,
k8counts,
k9counts,
k10counts,
k5topTable,
k6topTable,
k7topTable,
k8topTable,
k9topTable,
k10topTable,
file = here::here(
"1_Psen2S4Ter/R/output/1_2_kmer.RData"
)
)
sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_AU.UTF-8 LC_COLLATE=en_AU.UTF-8
## [5] LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8
## [7] LC_PAPER=en_AU.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Adelaide
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggdendro_0.2.0 pheatmap_1.0.12 ggrepel_0.9.6 DT_0.33
## [5] edgeR_4.4.0 limma_3.62.1 kableExtra_1.4.0 ggpubr_0.6.0
## [9] scales_1.3.0 here_1.0.1 pander_0.6.5 magrittr_2.0.3
## [13] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [17] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [21] ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 viridisLite_0.4.2 farver_2.1.2 fastmap_1.2.0
## [5] digest_0.6.37 timechange_0.3.0 lifecycle_1.0.4 statmod_1.5.0
## [9] compiler_4.4.2 rlang_1.1.4 sass_0.4.9 tools_4.4.2
## [13] utf8_1.2.4 yaml_2.3.10 knitr_1.49 ggsignif_0.6.4
## [17] labeling_0.4.3 htmlwidgets_1.6.4 bit_4.5.0 xml2_1.3.6
## [21] RColorBrewer_1.1-3 abind_1.4-8 withr_3.0.2 grid_4.4.2
## [25] fansi_1.0.6 colorspace_2.1-1 MASS_7.3-61 cli_3.6.3
## [29] rmarkdown_2.28 crayon_1.5.3 generics_0.1.3 rstudioapi_0.17.1
## [33] tzdb_0.4.0 cachem_1.1.0 splines_4.4.2 vctrs_0.6.5
## [37] Matrix_1.7-1 jsonlite_1.8.9 carData_3.0-5 car_3.1-2
## [41] hms_1.1.3 bit64_4.5.2 rstatix_0.7.2 systemfonts_1.1.0
## [45] crosstalk_1.2.1 locfit_1.5-9.10 jquerylib_0.1.4 glue_1.8.0
## [49] cowplot_1.1.3 stringi_1.8.4 gtable_0.3.6 munsell_0.5.1
## [53] pillar_1.9.0 htmltools_0.5.8.1 R6_2.5.1 rprojroot_2.0.4
## [57] vroom_1.6.5 evaluate_1.0.1 lattice_0.22-6 backports_1.5.0
## [61] broom_1.0.7 bslib_0.8.0 Rcpp_1.0.13-1 svglite_2.1.3
## [65] gridExtra_2.3 nlme_3.1-166 mgcv_1.9-1 xfun_0.49
## [69] pkgconfig_2.0.3